This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
data <- read.csv('~/gr5243/philosophy_data.csv')
unique(data$author)
[1] "Plato" "Aristotle" "Locke" "Hume" "Berkeley" "Spinoza" "Leibniz"
[8] "Descartes" "Malebranche" "Russell" "Moore" "Wittgenstein" "Lewis" "Quine"
[15] "Popper" "Kripke" "Foucault" "Derrida" "Deleuze" "Merleau-Ponty" "Husserl"
[22] "Heidegger" "Kant" "Fichte" "Hegel" "Marx" "Lenin" "Smith"
[29] "Ricardo" "Keynes" "Epictetus" "Marcus Aurelius" "Nietzsche" "Wollstonecraft" "Beauvoir"
[36] "Davis"
unique(data$original_publication_date)
[1] -350 -320 1689 1739 1779 1713 1710 1677 1637 1641 1674 1921 1912 1910 1953 1985 1950 1959 1972 1975 1963 1961 1966 1967 1968 1945 1936
[28] 1907 1927 1788 1790 1781 1798 1817 1807 1820 1883 1848 1862 1776 125 170 1888 1886 1887 1792 1949 1981
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(tidytext)
library(ggplot2)
data_words <- data %>%
unnest_tokens(word, tokenized_txt)
data_count <- data_words %>%
group_by(author, word) %>%
count()
data(stop_words)
data_count_use <- data_count %>%
anti_join(stop_words)
Joining, by = "word"
topwords <- data_count_use %>%
group_by(author) %>%
top_n(1, n)
ggplot(topwords, aes(x = reorder(word, n), y = n, fill = author)) +
geom_col() +
xlab(NULL) +
coord_flip() +
ggtitle("Top common word by each author") +
xlab("Word") +
ylab("Frequency")
data_avg <- data %>%
group_by(author) %>%
summarize(avg_sentence_length = mean(sentence_length))
ggplot(data_avg, aes(x = avg_sentence_length, y = author)) +
geom_bar(stat = "identity") +
ggtitle("Average sentence length by author") +
xlab("Average sentence length") +
ylab("Author")
data_avgschool <- data %>%
group_by(school) %>%
summarize(avg_sentence_length1 = mean(sentence_length))
ggplot(data_avgschool, aes(x = avg_sentence_length1, y = school)) +
geom_bar(stat = "identity") +
ggtitle("Average sentence length by school") +
xlab("Average sentence length") +
ylab("School")
descartes <- data %>% filter(author == "Descartes")
descartes_word <- descartes %>% unnest_tokens(word, tokenized_txt) %>% count(word, sort = TRUE)
descartes_word_use <- descartes_word %>%
anti_join(stop_words)
Joining, by = "word"
descartes_top <- head(descartes_word_use, 20)
descartes_bot <- tail(descartes_word_use, 20)
ggplot(descartes_top, aes(x = reorder(word, n), y = n)) +
geom_col() +
xlab(NULL) +
ggtitle("Top 20 words by Descartes") +
xlab("Word") +
ylab("Frequency")
wittgenstein <- data %>% filter(author == "Wittgenstein")
wittgenstein_word <- wittgenstein %>% unnest_tokens(word, tokenized_txt) %>% count(word, sort = TRUE)
wittgenstein_word_use <- wittgenstein_word %>%
anti_join(stop_words)
Joining, by = "word"
wittgenstein_top <- head(wittgenstein_word_use, 20)
wittgenstein_bot <- tail(wittgenstein_word_use, 20)
ggplot(wittgenstein_top, aes(x = reorder(word, n), y = n)) +
geom_col() +
xlab(NULL) +
ggtitle("Top 20 words by Wittgenstein") +
xlab("Word") +
ylab("Frequency")
library(tidytext)
library(textdata)
get_sentiments("nrc")
d_nrc_positive <- get_sentiments("nrc") %>%
filter(sentiment == "positive")
data_words %>%
filter(author == "Descartes") %>%
inner_join(d_nrc_positive) %>%
count(word,sort = TRUE)
Joining, by = "word"
d_nrc_negative <- get_sentiments("nrc") %>%
filter(sentiment == "negative")
data_words %>%
filter(author == "Descartes") %>%
inner_join(d_nrc_negative) %>%
count(word,sort = TRUE)
Joining, by = "word"
w_nrc_positive <- get_sentiments("nrc") %>%
filter(sentiment == "positive")
data_words %>%
filter(author == "Wittgenstein") %>%
inner_join(w_nrc_positive) %>%
count(word,sort = TRUE)
Joining, by = "word"
w_nrc_negative <- get_sentiments("nrc") %>%
filter(sentiment == "negative")
data_words %>%
filter(author == "Wittgenstein") %>%
inner_join(w_nrc_negative) %>%
count(word,sort = TRUE)
Joining, by = "word"
library(sentimentr)
descartes_data <- data[data$author == "Descartes",]
descartes_data1 <- descartes_data$sentence_spacy
sentiment_scores <- sentiment(descartes_data1)
Warning: Each time `sentiment` is run it has to do sentence boundary disambiguation when a
raw `character` vector is passed to `text.var`. This may be costly of time and
memory. It is highly recommended that the user first runs the raw `character`
vector through the `get_sentences` function.
head(sentiment_scores,20)
library(sentimentr)
wittgenstein_data <- data[data$author == "Wittgenstein",]
wittgenstein_data1 <- wittgenstein_data$sentence_spacy
sentiment_scores1 <- sentiment(wittgenstein_data1)
Warning: Each time `sentiment` is run it has to do sentence boundary disambiguation when a
raw `character` vector is passed to `text.var`. This may be costly of time and
memory. It is highly recommended that the user first runs the raw `character`
vector through the `get_sentences` function.
head(sentiment_scores1,20)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.